None
Lizemarie Wium
The Wellmum Clinic was established to identify XXX. The clinic is used as a follow-up of patients who have given birth in the past XX months.
This document aims to answer the following questions:
# Import libraries
# General
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
# Import data and print the shape of the data
df = pd.read_excel("WELLMUM_20230609.xlsx")
# Rename columns
df = df.rename(columns={'Gravidity (how many pregnancies have you had?)':'Gravidity',
'Parity (number of live births)': 'Parity',
'random glucose': 'RandomGlucose',
'AGE': 'Age',
'Weight (kg)': 'Weight',
'Mid upper arm circumstance ': 'MUAC',
'ID ': 'Id',
'Current BMI': 'BMI',
'GDM control': 'GDMControl',
'Maternal date of birth': 'MDOB',
'Problems during pregnancy': 'Problems'})
# # Select subset of columns
cols = ['Id', 'Age', 'Gravidity', 'Parity', 'BMI', 'Weight', 'Creatinine',
'HbA1C', 'ACR', 'GDMControl', 'Race', 'RandomGlucose', 'eGFR', 'ALT',
'MUAC', 'Hb', 'MDOB']
df = df[cols]
# Clean Race
df['Race'] = df['Race'].str.upper()
df['Race'] = df['Race'].str.strip()
df.loc[(df['Race']=='ASIAN'), 'Race'] = 'ASIAN'
df.loc[(df['Race']=='EAST ASIAN'), 'Race'] = 'ASIAN'
df.loc[(df['Race']=='SOUTH ASIAN'), 'Race'] = 'ASIAN'
df['BMI'] = df['BMI'].round(2)
df['HbA1C'] = df['HbA1C'].astype(float)
print(df.head(5))
Id Age Gravidity Parity BMI Weight Creatinine HbA1C ACR \
0 WM001 34 2 1 24.85 59.7 69.0 37.0 7.4
1 WM002 44 6 1 22.31 66.0 66.0 39.0 9.3
2 WM003 30 1 1 22.73 58.2 63.0 40.0 91.3
3 WM004 29 1 1 33.52 106.8 47.0 35.0 NaN
4 WM005 36 3 3 28.04 76.8 59.0 37.0 6.6
GDMControl Race RandomGlucose eGFR ALT MUAC Hb \
0 Metformin WHITE 3.5 132.714169 NaN 30.0 132.0
1 Insulin MIXED 4.6 118.080000 NaN 27.0 70.0
2 Metformin BLACK 4.4 124.991429 NaN 28.0 139.0
3 Metformin WHITE 6.1 310.242638 NaN 29.0 124.0
4 Metformin WHITE 4.8 166.512814 NaN 32.0 141.0
MDOB
0 1988-08-10 00:00:00
1 1978-05-30 00:00:00
2 1992-05-18 00:00:00
3 1993-06-23 00:00:00
4 1986-06-12 00:00:00
age = df.groupby(['Age']).size().reset_index(name='Counts')
fig = go.Figure([go.Bar(x=age['Age'], y=age['Counts'], text=age['Counts'])])
fig.update_layout(
title=go.layout.Title(
text=f"Age distribution<br><sup><i>(n={len(df)})</i></sup>",
xref="paper",
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text="Age<br><sup><i>years</i></sup>"
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text="Count"
)
)
)
fig.show()
race = df.groupby(['Race']).size().reset_index(name='Counts')
fig = go.Figure([go.Bar(x=race['Race'], y=race['Counts'], text=race['Counts'])])
fig.update_layout(
title=go.layout.Title(
text=f"Race distribution<br><sup><i>(n={len(df)})</i></sup>",
xref="paper",
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text="Race"
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text="Count"
)
)
)
fig.show()
par = df.groupby(['Parity']).size().reset_index(name='Counts')
fig = go.Figure([go.Bar(x=par['Parity'], y=par['Counts'], text=par['Counts'])])
fig.update_layout(
title=go.layout.Title(
text=f"Parity distribution<br><sup><i>(n={len(df)})</i></sup>",
xref="paper",
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text="Parity<br><sup><i>number of live births</i></sup>"
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text="Count"
)
)
)
fig.show()
df["Creat80"] = 0
df.loc[df['Creatinine'] > 80, 'Creat80'] = 1
sumc = df["Creat80"].sum()
print(f"Creatinine > 80: {round(sumc / len(df), 3)}")
fig = go.Figure()
for tra in [0, 1]:
dft = df[df['Creat80']==tra]
if tra == 0:
name = 'Creatinine <= 80'
else:
name = 'Creatinine > 80'
fig.add_trace(go.Splom(
dimensions=[
dict(label='RandomGlucose',
values=dft['RandomGlucose']),
dict(label='HbA1C',
values=dft['HbA1C']),
dict(label='eGFR',
values=dft['eGFR']),
dict(label='Age',
values=dft['Age']),
dict(label='Hb',
values=dft['Hb']),
dict(label='ALT',
values=dft['ALT']),
dict(label='Creatinine',
values=dft['Creatinine']),
dict(label='ACR',
values=dft['ACR'])],
diagonal_visible=False, # remove plots on diagonal
text=dft['Id'],
name=name,
marker=dict(color=tra,
showscale=False, # colors encode categorical variables
line_color='white', line_width=0.5)
))
fig.update_layout(
#title='Creatinine > 80',
title=f"Creatinine > 80<br><sup><i>(n={sumc}, {round(sumc / len(df) * 100, 2)}%)</i></sup>",
showlegend=True,
width=1000,
height=1000,
)
fig.show()
Creatinine > 80: 0.117
fig = go.Figure()
for race in df.Race.unique():
dft = df[df['Race']==race]
name = f"{race}<br><sup><i>(n={len(dft)})</i></sup>"
fig.add_trace(go.Box(y=dft['Creatinine'], name = name,
notched=True,
#marker_color = 'lightseagreen'
))
fig.update_layout(
#title='Creatinine > 80',
title=f"Creatinine spread by race<br><sup><i>(n={len(df)})</i></sup>",
showlegend=False,
)
fig.show()
df["eGFR90"] = 0
df.loc[df['eGFR'] < 90, 'eGFR90'] = 1
sumc = df["eGFR90"].sum()
print(f"eGFR < 90: {round(sumc / len(df), 3)}")
fig = go.Figure()
for tra in [0, 1]:
dft = df[df['eGFR90']==tra]
if tra == 0:
name = 'eGFR >= 90'
else:
name = 'eGFR < 90'
fig.add_trace(go.Splom(
dimensions=[
dict(label='RandomGlucose',
values=dft['RandomGlucose']),
dict(label='HbA1C',
values=dft['HbA1C']),
dict(label='eGFR',
values=dft['eGFR']),
dict(label='Age',
values=dft['Age']),
dict(label='Hb',
values=dft['Hb']),
dict(label='ALT',
values=dft['ALT']),
dict(label='Creatinine',
values=dft['Creatinine']),
dict(label='ACR',
values=dft['ACR'])],
diagonal_visible=False, # remove plots on diagonal
text=dft['Id'],
name=name,
marker=dict(color=tra,
showscale=False, # colors encode categorical variables
line_color='white', line_width=0.5)
))
fig.update_layout(
#title='Creatinine > 80',
title=f"eGFR < 90<br><sup><i>(n={sumc}, {round(sumc / len(df) * 100, 2)}%)</i></sup>",
showlegend=True,
width=1000,
height=1000,
)
fig.show()
eGFR < 90: 0.027
fig = go.Figure()
for race in df.Race.unique():
dft = df[df['Race']==race]
name = f"{race}<br><sup><i>(n={len(dft)})</i></sup>"
fig.add_trace(go.Box(y=dft['eGFR'], name = name,
notched=True,
#marker_color = 'lightseagreen'
))
fig.update_layout(
#title='Creatinine > 80',
title=f"eGFR spread by race<br><sup><i>(n={len(df)})</i></sup>",
showlegend=False,
)
fig.show()
columns = ['HbA1C', 'BMI', 'MUAC', 'Creatinine', 'ALT', 'eGFR']
values = []
for x in columns:
vals = []
for y in columns:
corr = round(df[x].corr(df[y]),4)
vals.append(corr)
values.append(vals)
fig = go.Figure(data=go.Heatmap(
z=values,
x=columns,
y=columns,
text=values,
texttemplate="%{text}",
hoverongaps = True, ))
fig.update_layout(
#title='Creatinine > 80',
title=f"Correlation matrix<br><sup><i>(n={len(df)})</i></sup>",
showlegend=False,
)
fig.show()
fig = go.Figure()
corra = round(df['BMI'].corr(df['MUAC']),4)
for race in df.Race.unique():
dft = df[df['Race']==race]
corr = round(dft['BMI'].corr(dft['MUAC']),4)
name = f"{race}<br><sup><i>(correlation={corr})</i></sup>"
fig.add_trace(go.Scatter(
x=dft['BMI'], y=dft['MUAC'],
name=name,
mode='markers',
))
# Set options common to all traces with fig.update_traces
fig.update_traces(mode='markers', marker_line_width=1, marker_size=10)
fig.update_layout(
title=go.layout.Title(
text=f"MUAC vs BMI<br><sup><i>(correlation={corra})</i></sup>",
xref="paper",
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text="BMI<br><sup><i>Body Mass Index</i></sup>"
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text="MUAC<br><sup><i>Mid Upper Arm Circumference</i></sup>"
)
)
)
fig.show()
fig = go.Figure()
corra = round(df['RandomGlucose'].corr(df['HbA1C']),4)
for race in df.Race.unique():
dft = df[df['Race']==race]
dft = dft[dft['HbA1C']>=18]
dft = dft[dft['HbA1C']<=50]
corr = round(dft['RandomGlucose'].corr(dft['HbA1C']),4)
name = f"{race}<br><sup><i>(correlation={corr})</i></sup>"
fig.add_trace(go.Scatter(
x=dft['RandomGlucose'], y=dft['HbA1C'],
name=name,
mode='markers',
))
# Set options common to all traces with fig.update_traces
fig.update_traces(mode='markers', marker_line_width=1, marker_size=10)
fig.update_layout(
title=go.layout.Title(
text=f"HbA1C vs Random Glucose<br><sup><i>(correlation={corra})</i></sup>",
xref="paper",
x=0
),
xaxis=go.layout.XAxis(
title=go.layout.xaxis.Title(
text="Random Glucose<br><sup><i> </i></sup>"
)
),
yaxis=go.layout.YAxis(
title=go.layout.yaxis.Title(
text="HbA1C<br><sup><i> </i></sup>"
)
)
)
fig.show()